day 18 利用yolo結合AI辨識動物情緒

2024 iThome 鐵人賽

DAY 18

AI/ ML & Data

基於人工智慧與深度學習對斑馬魚做行為分析系列第 18 篇

16th鐵人賽

neilsu02

2024-08-20 23:06:53

193 瀏覽

分享至

今天是第十八天我們可以寫一個yolo 配合 AI去辨識動物的情緒，以下是程式碼

import cv2
import torch
from torchvision import transforms
from PIL import Image
import torch.nn as nn

# Step 1: 使用 YOLOv5 進行動物偵測
def detect_animal(image_path, yolo_model):
    # 使用 OpenCV 加載影像
    image = cv2.imread(image_path)
    results = yolo_model(image)
    
    # 假設 YOLOv5 回傳了 bounding boxes 和 labels
    # 這裡假設 label 是 'animal' 的物體被偵測到了
    detected_animals = []
    for detection in results.xyxy[0]:
        label = int(detection[5])
        if yolo_model.names[label] == 'animal':
            xmin, ymin, xmax, ymax = map(int, detection[:4])
            detected_animals.append(image[ymin:ymax, xmin:xmax])
    
    return detected_animals

# Step 2: 辨識動物情緒 (假設有一個預訓練的情緒辨識模型)
class EmotionRecognitionModel(nn.Module):
    def __init__(self):
        super(EmotionRecognitionModel, self).__init__()
        self.conv = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc = nn.Linear(16 * 64 * 64, 3)  # 假設輸出 3 種情緒 (快樂, 生氣, 恐懼)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv(x)))
        x = x.view(-1, 16 * 64 * 64)
        x = self.fc(x)
        return x

def recognize_emotion(animal_image, emotion_model):
    # 預處理圖像以適應模型
    preprocess = transforms.Compose([
        transforms.Resize((128, 128)),
        transforms.ToTensor(),
    ])
    
    image = Image.fromarray(animal_image)
    input_tensor = preprocess(image).unsqueeze(0)  # 增加批次維度

    # 使用模型預測情緒
    emotion_model.eval()
    with torch.no_grad():
        output = emotion_model(input_tensor)
    
    # 將預測結果轉換為情緒標籤
    _, predicted = torch.max(output, 1)
    emotion = ['Happy', 'Angry', 'Fearful'][predicted.item()]
    
    return emotion

# 主程式
if __name__ == "__main__":
    # 加載 YOLO 模型 (YOLOv5)
    yolo_model = torch.hub.load('ultralytics/yolov5', 'yolov5s')

    # 假設我們有一個已經訓練好的情緒辨識模型
    emotion_model = EmotionRecognitionModel()

    # 影像路徑
    image_path = 'animal.jpg'
    
    # 使用 YOLO 偵測動物
    detected_animals = detect_animal(image_path, yolo_model)
    
    # 辨識每隻動物的情緒
    for idx, animal_image in enumerate(detected_animals):
        emotion = recognize_emotion(animal_image, emotion_model)
        print(f"動物 {idx + 1} 的情緒是: {emotion}")

1. `import` 區塊

import cv2
import torch
from torchvision import transforms
from PIL import Image
import torch.nn as nn

cv2：OpenCV的Python接口，用來處理影像讀取、顯示和操作。
torch：PyTorch框架，用來處理深度學習模型的構建和訓練。
transforms：來自torchvision，用於圖像的預處理，如大小調整、轉換為Tensor等。
Image：來自PIL，用於讀取和處理圖像。
nn：PyTorch中的神經網絡模組，用來構建深度學習模型。

2. YOLO 動物偵測 (`detect_animal` 函數)

def detect_animal(image_path, yolo_model):
    # 使用 OpenCV 加載影像
    image = cv2.imread(image_path)
    results = yolo_model(image)
    
    # 假設 YOLOv5 回傳了 bounding boxes 和 labels
    # 這裡假設 label 是 'animal' 的物體被偵測到了
    detected_animals = []
    for detection in results.xyxy[0]:
        label = int(detection[5])
        if yolo_model.names[label] == 'animal':
            xmin, ymin, xmax, ymax = map(int, detection[:4])
            detected_animals.append(image[ymin:ymax, xmin:xmax])
    
    return detected_animals

image_path：傳入影像的路徑。
yolo_model：使用預先訓練好的YOLO模型來進行物體偵測。
cv2.imread(image_path)：使用OpenCV讀取影像並存儲在image變數中。
yolo_model(image)：使用YOLO模型對影像進行偵測，返回結果results。
results.xyxy[0]：從YOLO的輸出中提取偵測到的物體邊界框和標籤。
label：提取YOLO預測的物體標籤（如狗、貓等）。
if yolo_model.names[label] == 'animal'：檢查偵測到的物體是否為動物。
xmin, ymin, xmax, ymax：邊界框座標，定義了動物在圖像中的位置。
detected_animals.append(image[ymin:ymax, xmin:xmax])：將偵測到的動物圖像區域存儲在detected_animals列表中。
return detected_animals：返回偵測到的所有動物圖像區域。

3. 情緒辨識模型 (`EmotionRecognitionModel` 類)

class EmotionRecognitionModel(nn.Module):
    def __init__(self):
        super(EmotionRecognitionModel, self).__init__()
        self.conv = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc = nn.Linear(16 * 64 * 64, 3)  # 假設輸出 3 種情緒 (快樂, 生氣, 恐懼)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv(x)))
        x = x.view(-1, 16 * 64 * 64)
        x = self.fc(x)
        return x

nn.Module：所有PyTorch模型的基類。
Conv2d：2D卷積層，用於提取影像中的特徵。3代表輸入影像有3個通道（RGB），16代表輸出16個特徵圖，kernel_size=3代表卷積核大小為3x3。
MaxPool2d：最大池化層，用於減少特徵圖的尺寸並保留最重要的特徵。
Linear：全連接層，用於將特徵映射到情緒類別的輸出。這裡假設輸出3種情緒（快樂、憤怒、恐懼）。
forward 函數：定義了前向傳播的過程。影像經過卷積、ReLU激活函數、池化、展平成向量，然後通過全連接層進行情緒分類。

4. 辨識動物情緒 (`recognize_emotion` 函數)

def recognize_emotion(animal_image, emotion_model):
    # 預處理圖像以適應模型
    preprocess = transforms.Compose([
        transforms.Resize((128, 128)),
        transforms.ToTensor(),
    ])
    
    image = Image.fromarray(animal_image)
    input_tensor = preprocess(image).unsqueeze(0)  # 增加批次維度

    # 使用模型預測情緒
    emotion_model.eval()
    with torch.no_grad():
        output = emotion_model(input_tensor)
    
    # 將預測結果轉換為情緒標籤
    _, predicted = torch.max(output, 1)
    emotion = ['Happy', 'Angry', 'Fearful'][predicted.item()]
    
    return emotion

preprocess：一組圖像轉換操作，將圖像大小調整為128x128，並轉換為Tensor。
Image.fromarray(animal_image)：將Numpy陣列（OpenCV圖像）轉換為PIL圖像，以便進行預處理。
preprocess(image).unsqueeze(0)：將圖像轉換為Tensor，並增加一個批次維度（因為模型期望輸入有一個批次維度）。
emotion_model.eval()：將模型設置為評估模式，停用dropout等訓練特定的層。
with torch.no_grad()：停用自動梯度計算，節省記憶體並加速推理。
output = emotion_model(input_tensor)：將預處理後的圖像輸入到情緒辨識模型中，得到模型的輸出。
torch.max(output, 1)：找到輸出中最大值的索引，這個索引對應於預測的情緒類別。
predicted.item()：將Tensor轉換為普通的Python數值。
emotion = ['Happy', 'Angry', 'Fearful'][predicted.item()]：將數值索引映射到對應的情緒標籤。

5. 主程式

if __name__ == "__main__":
    # 加載 YOLO 模型 (YOLOv5)
    yolo_model = torch.hub.load('ultralytics/yolov5', 'yolov5s')

    # 假設我們有一個已經訓練好的情緒辨識模型
    emotion_model = EmotionRecognitionModel()

    # 影像路徑
    image_path = 'animal.jpg'
    
    # 使用 YOLO 偵測動物
    detected_animals = detect_animal(image_path, yolo_model)
    
    # 辨識每隻動物的情緒
    for idx, animal_image in enumerate(detected_animals):
        emotion = recognize_emotion(animal_image, emotion_model)
        print(f"動物 {idx + 1} 的情緒是: {emotion}")

torch.hub.load('ultralytics/yolov5', 'yolov5s')：從PyTorch Hub加載預訓練的YOLOv5模型，用於物體偵測。
EmotionRecognitionModel()：初始化情緒辨識模型。
image_path = 'animal.jpg'：設定影像檔案的路徑。
detect_animal(image_path, yolo_model)：使用YOLO模型偵測影像中的動物。
for idx, animal_image in enumerate(detected_animals)：遍歷所有偵測到的動物圖像區域，並使用情緒辨識模型進行情緒預測。
print(f"動物 {idx + 1} 的情緒是: {emotion}")：輸出每個偵測到的動物的情緒結果。